## Code that creates figures with logit results.
## Last changed: 2025-05-23

sink("C:/Userdata/Shared/Dofiles/DoAnalysis/JanKalle/ReplicationFiles/AnalysisScripts/Output/FigureA9A10A11A12.txt", split=TRUE)

library(PSweight)
library(cobalt)
library(MatchIt)
library(distances)
library(moreparty)
library(party)
library(caret)
library(bonsai)
library(randomForest)
library(tidymodels)
library(tidyverse)
library(rmarkdown)
library(tinytex)
library(tibble)
library(knitr)
library(car)
library(fst)
library(stargazer)
library(pryr)
library(stringr)
library(fst)
library(lubridate)
library(data.table)
library(summarytools)
library(ggplot2)
library(cowplot)
library(ggpubr)
library(haven)
library(broom)
library(fixest)
library(modelsummary)
library(gt)
library(MASS)
library(survey)
library(collapse)
library(readxl)
library(Hmisc)
library(DescTools)
library(ranger)
library(stablelearner)
library(future.apply)
library(lme4)
library(partykit)
library(ggiplot)
library(MASS)
library(clarify)
library(parallel)
 
  set.seed(1234)
  
  # Set number of simulations to use when calculating the standard errors
  nsim <- 500
  
  # Specify the name of the treatment variable
  tvar <- "T94_90"
  
  # Specify the the birth cohorts to be used for the analysis
  lbyr <- 1942
  ubyr <- 1967
  
  # Specify if only include Swedish citizens 
  citizen <- 1
  
  # Specify the propensity score specification
  psformula <- formula(T94 ~
                         Kon + DAStod_1982 + DAStod_1983 + DAStod_1984 +
                         DAStod_1985 + DAStod_1986 + DAStod_1987 + DAStod_1988 +
                         DAStod_1989 +
                         DAntBarn_91 + Sambo_91 + InvBak  |
                         SSYK3_90 + SNI2_91 + Kommun_91 +
                         rCARB_1982 + rCARB_1983 + rCARB_1984 + rCARB_1985 +
                         rCARB_1986 + rCARB_1987 + rCARB_1988 + rCARB_1989 +
                         pArbInk_1990 + pArbInk_1991 + UtbAr_91 +
                         FodAr + Nom82 + Nom85 + Nom88 + Nom91)
  
  
  Models <-  vector('list', 5)
  lModels <-  vector('list', 5)
  j <- 1
  
  for(i in 1:5) { 
    
    mycl<-makeCluster(2 ,type="SOCK")
    
    # Read in the data sets
    NomVald <- read_fst("E:/ProjData/Jan_Kalle/JoPReplications/DB_NomVald.fst", as.data.table=T)
    RTB <- read_fst("E:/ProjData/Jan_Kalle/JoPReplications/DB_RTB.fst", as.data.table=T)
    db <- read_fst("E:/ProjData/Jan_Kalle/JoPReplications/DB_Nom.fst", as.data.table=T)
    
    db[, T94 := get(tvar)]
    mdb <- db[between(FodAr, lbyr, ubyr) & (SvMedb82_18 >= citizen) & AllaVal==1,]
    rm(db)
    gc()
    
    mdb[!is.na(avSES3), ravSES := cut_number(avSES3, 4, labels = FALSE)]
    if(i==1) mdb <- mdb[between(ravSES, 1, 1)]
    if(i==2) mdb <- mdb[between(ravSES, 2, 2)]
    if(i==3) mdb <- mdb[between(ravSES, 3, 3)]
    if(i==4) mdb <- mdb[between(ravSES, 4, 4)]
    
    # Use logit model to estimate propensity scores
    mdb <- mdb[complete.cases(mdb),]
    tm <- proc.time()
    logmod <- feglm(psformula,  
                    data = mdb, family="binomial")
    proc.time()-tm
    print(summary(logmod))
    
    # Extract predicted probability (propensity scores)
    if(logmod$nobs == logmod$nobs_origin){ 
      mdb[, pT94 := predict(logmod, type = "response")]
    }else{
      mdb[logmod$obs_selection$obsRemoved, pT94 := predict(logmod, type = "response")]
    }
    
    mdb <- mdb[complete.cases(mdb),]
    
    # Now use the Full matching approach of Sävje et al. to obtain ATT and ATE weights.  
    
    tmp <- proc.time()
    set.seed(7892)
    m.out <- matchit(T94~1, data = mdb,
                     distance = mdb$pT94, method = "quick", estimand = "ATT")
    proc.time()-tmp
    m.data1 <- as.data.table(match.data(m.out))[, .(LopNr, weights)]
    setnames(m.data1, "weights", "gm_att")
    
    mdb <- m.data1[, .(LopNr, gm_att)][mdb, on = "LopNr"]
    gc()
    RTB <- mdb[RTB, on = "LopNr"]
    
    rm(mdb)
    gc()
    
    
    mdb <- RTB[!is.na(gm_att), .(Nom, T94, Ar, LopNr, gm_att)]
    rm(list=setdiff(ls(), c("mdb", "mycl", "Models", "lModels", "i", "tvar", "lbyr", "ubyr", "citizen", "psformula", "nsim")))
    gc()
      
    model <- feglm(Nom~T94+i(Ar, T94, ref=1991) | Ar, cluster ="LopNr", family="binomial", 
                     weights = mdb[, gm_att], 
                     mdb[])
    
    lModels[[i]] <-model
    
    print(summary(model))
    print(uniqueN(mdb[T94==0, LopNr]))
    print(uniqueN(mdb[T94==1, LopNr]))
    print(uniqueN(mdb[, LopNr]))
    print(dim(mdb))
    
  gc()

  clusterExport(cl=mycl, c('mdb', 'model'))
  
# Simulate coefficients from the model
s <- sim(model, n = nsim)


# Define the function that calculates the difference between the predicted probabilities
my_fun <- function(fit) {
  
  ddest <- c()
  for(ar in c(1982, 1985, 1988, 1994, 1998, 2002, 2006, 2010, 2014, 2018, 2022 )) {
  
    par <- (predict(fit, newdata = data.frame(T94 = 1, Ar=ar), type = "response")- 
            predict(fit, newdata = data.frame(T94 = 1, Ar=1991), type = "response"))-
           (predict(fit, newdata = data.frame(T94 = 0, Ar=ar), type = "response")- 
            predict(fit, newdata = data.frame(T94 = 0, Ar=1991), type = "response"))
  
    ddest <- c(ddest, par)
  }
 
  
  return(ddest)
  
}
tmp <- proc.time()
# Apply the function to the simulated coefficients earlier
est1 <- sim_apply(s, FUN = my_fun, cl=mycl)
proc.time()-tmp

stopCluster(mycl)

  gc()

  pmod <- summary(est1, ci = TRUE, level = 0.95, method = "quantile")
  pmod <- as.data.table(pmod)
  setnames(pmod, names(pmod), c("DD_est", "CI_low", "CI_high"))
  ar <- c(1982, 1985, 1988, 1994, 1998, 2002, 2006, 2010, 2014, 2018, 2022)
  pmod[, Ar := ar]
  ref <- as.data.table(cbind(DD_est=0, CI_low=0, CI_high=0, Ar=1991))
  pmod <- rbindlist(list(pmod, ref), use.names=T)
  setorder(pmod, Ar)
  pmod[, DD_est := 100*DD_est]
  pmod[, CI_low := 100*CI_low]
  pmod[, CI_high := 100*CI_high]

  Models[[i]] <- pmod
  } 
  
  valar <- c(1982, 1985, 1988, 1991, 1994, 1998, 2002, 2006, 2010, 2014, 2018, 2022)
  
  for(i in 1:5) {
    
    ses <- paste0("Quartile ", i, " SES") 
    
    gi <- ggiplot(lModels[[i]]) 
    gp <- ggplot(gi$data, aes(x, estimate)) +
          geom_point() +
          geom_errorbar(aes(ymin=ci_low,ymax=ci_high), width=0.4, size=.6) +
          theme_classic(base_size = 13) +
          xlab("Election Year") +
          ylab("Logit Coefficient") +
          scale_x_continuous(breaks = valar,  labels = valar, limits = c(1981, 2023)) +
          geom_hline(yintercept=0, linetype="longdash") +
          geom_line(linetype="dotted") +
          ggtitle(ses) +
          scale_y_continuous(limits = c(-.3, .6), breaks = round(seq(-.3, .6, by = .1), digits=1)) + 
          theme(legend.position = "bottom", panel.grid.major.y = element_line(color = "grey95"))
    
    #ggsave(paste0("C:/Userdata/Shared/Dofiles/DoAnalysis/JanKalle/ReplicationFiles/AnalysisScripts/Figures/Fig_LogitCoef_", i, ".pdf"), width = 15, height =10, units = "in") 
    
    assign(paste0("gp", i), gp)
  }
  ggarrange(gp1, gp2, gp3, gp4)
  ggsave("C:/Userdata/Shared/Dofiles/DoAnalysis/JanKalle/ReplicationFiles/AnalysisScripts/Figures/FigureA10.pdf", width = 15, height =10, units = "in") 
  
  gp5 <- gp5 + 
    ggtitle("") +
    scale_y_continuous(limits = c(-.15, .3), breaks = round(seq(-.15, .3, by = .05), digits=2))
  gp5
  ggsave("C:/Userdata/Shared/Dofiles/DoAnalysis/JanKalle/ReplicationFiles/AnalysisScripts/Figures/FigureA9.pdf", width = 15, height =10, units = "in")
  
  rm(gp1, gp2, gp3, gp4, gp5)
  
for(i in 1:5) {
    
    ses <- paste0("Quartile ", i, " SES") 
    
    pmod <- Models[[i]]
    
  gp <- ggplot(pmod, aes(Ar, DD_est)) +
    geom_point() +
    geom_errorbar(aes(ymin=CI_low,ymax=CI_high), width=0.4, size=.6) +
    theme_classic(base_size = 13) +
    xlab("Election Year") +
    ylab("Effect on Candicacy") +
    scale_x_continuous(breaks = valar,  labels = valar, limits = c(1981, 2023)) +
    geom_hline(yintercept=0, linetype="longdash") +
    geom_line(linetype="dotted") +
    ggtitle(ses) +
    scale_y_continuous(limits = c(-.3, .9), breaks = round(seq(-.3, 1, by = .1), digits=1)) + 
    theme(legend.position = "bottom", panel.grid.major.y = element_line(color = "grey95"))
    
    #ggsave(paste0("C:/Userdata/Shared/Dofiles/DoAnalysis/JanKalle/ReplicationFiles/AnalysisScripts/Figures/Fig_Logit_", i, ".pdf"), width = 15, height =10, units = "in") 
  
    assign(paste0("gp", i), gp)
}
  ggarrange(gp1, gp2, gp3, gp4)
  ggsave("C:/Userdata/Shared/Dofiles/DoAnalysis/JanKalle/ReplicationFiles/AnalysisScripts/Figures/FigureA12.pdf", width = 15, height =10, units = "in") 

  gp5 <- gp5 + 
    ggtitle("") +
    scale_y_continuous(limits = c(-.1, .3), breaks = round(seq(-.1, .3, by = .05), digits=2))
  gp5
  ggsave("C:/Userdata/Shared/Dofiles/DoAnalysis/JanKalle/ReplicationFiles/AnalysisScripts/Figures/FigureA11.pdf", width = 15, height =10, units = "in")
  
  modelsummary(lModels[c(5, 1, 2, 3, 4)], output = "C:/Userdata/Shared/Dofiles/DoAnalysis/JanKalle/ReplicationFiles/AnalysisScripts/Tables/TableFigA9A10.tex")
  saveRDS(Models, file="C:/Userdata/Shared/Dofiles/DoAnalysis/JanKalle/ReplicationFiles/AnalysisScripts/Tables/LogitProb.RData")
  
sink() 